* Input:
* OrigData/accounts_geocoded_20151015.csv
* OrigData/1270055006_CG_POSTCODE_2011_SA3_2011.xls

* Output:
* Data/Addresses
* Data/postcode_SA3.dta
* Data/Addresses_SA3.dta
* Data/postcode_locality.dta



********************************************************************
**************************** ACCOUNT DATA - GEOCODES     ***********
********************************************************************

* Household street names
insheet using OrigData/accounts_geocoded_20151015.csv, clear
	keep account_id road_name road_type locality postcode
	sort account_id


save Data/Addresses, replace



******************************************************************
****************** GET POSTCODE TO SA3 MAP ***********************
******************************************************************

import excel using OrigData/1270055006_CG_POSTCODE_2011_SA3_2011.xls, sheet("Table 3") cellrange(B8:E3636) clear
	rename B postcode 
	rename C SA3
	drop D 
	rename E ratio

	destring postcode, replace

	keep if postcode >=3000 & postcode < 4000

	sort postcode ratio
	
	collapse (last) SA3, by(postcode)

save Data/postcode_SA3.dta, replace




**********************************************
********** MATCH POSTCODE TO SA3 *************
**********************************************


use Data/Addresses.dta, clear

	gen pcode_orig= postcode

	* postcodes missing in census areas 
	replace postcode =3214 if postcode == 3213 
	replace postcode = 3235 if postcode ==3234 

	merge m:1 postcode using Data/postcode_SA3.dta

	keep if _merge ==3
	drop _merge

	drop postcode 

	rename pcode postcode 

	destring SA3, replace

save Data/Addresses_SA3.dta, replace






******************************************************************
****************** GET POSTCODE TO LOCALITY MAP ******************
******************************************************************
import excel using OrigData/HousePrices/LocalityFinder.xls, firstrow cellrange(A3:B3882) clear

* a locality matches to a single postcode except for "Melbourne" which has postcode 3000 and 3004 - most are 3000
* a postcode can have multiple localities

	rename LocalityName locality 
	rename PostCode postcode

	destring postcode, replace

	* duplicate locality postcode pairs because map comes from electorates 
	duplicates drop

	replace locality = upper(locality)

	split locality 


save Data/postcode_locality.dta, replace


